// This file is part of Notepad4.
// See License.txt for details about distribution and modification.
//! Lexer for Cangjie.

#include <cassert>
#include <cstring>

#include <string>
#include <string_view>
#include <vector>

#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"

#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "StringUtils.h"
#include "LexerModule.h"
#include "LexerUtils.h"

using namespace Lexilla;

namespace {

struct EscapeSequence {
	int outerState = SCE_CANGJIE_DEFAULT;
	int digitsLeft = 0;
	bool brace = false;

	// highlight any character as escape sequence.
	bool resetEscapeState(int state, int chNext) noexcept {
		if (IsEOLChar(chNext)) {
			return false;
		}
		outerState = state;
		brace = false;
		digitsLeft = 1;
		return true;
	}
	bool atEscapeEnd(int ch) noexcept {
		--digitsLeft;
		return digitsLeft <= 0 || !IsHexDigit(ch);
	}
};

enum {
	CangjieLineStateMaskLineComment = 1,	// line comment
	CangjieLineStateMaskImport = (1 << 1),	// import
};

//KeywordIndex++Autogenerated -- start of section automatically generated
enum {
	KeywordIndex_Keyword = 0,
	KeywordIndex_Type = 1,
	KeywordIndex_Annotation = 3,
	KeywordIndex_Class = 4,
	KeywordIndex_Struct = 5,
	KeywordIndex_Interface = 6,
	KeywordIndex_Enumeration = 7,
};
//KeywordIndex--Autogenerated -- end of section automatically generated

enum class KeywordType {
	None = SCE_CANGJIE_DEFAULT,
	Type = SCE_CANGJIE_TYPE,
	Class = SCE_CANGJIE_CLASS,
	Struct = SCE_CANGJIE_STRUCT,
	Interface = SCE_CANGJIE_INTERFACE,
	Enum = SCE_CANGJIE_ENUM,
	Function = SCE_CANGJIE_FUNCTION_DEFINITION,
};

static_assert(DefaultNestedStateBaseStyle + 1 == SCE_CANGJIE_STRING_SQ);
static_assert(DefaultNestedStateBaseStyle + 2 == SCE_CANGJIE_STRING_DQ);
static_assert(DefaultNestedStateBaseStyle + 3 == SCE_CANGJIE_TRIPLE_STRING_SQ);
static_assert(DefaultNestedStateBaseStyle + 4 == SCE_CANGJIE_TRIPLE_STRING_DQ);

constexpr bool IsSpaceEquiv(int state) noexcept {
	return state <= SCE_CANGJIE_TASKMARKER;
}

constexpr bool IsTripleString(int state) noexcept {
	return state == SCE_CANGJIE_TRIPLE_STRING_SQ || state == SCE_CANGJIE_TRIPLE_STRING_DQ;
}

constexpr int GetStringQuote(int state) noexcept {
	if constexpr (SCE_CANGJIE_STRING_SQ & 1) {
		return (state & 1) ? '\'' : '\"';
	} else {
		return (state & 1) ? '\"' : '\'';
	}
}

void ColouriseCangjieDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList keywordLists, Accessor &styler) {
	int lineStateLineType = 0;
	int commentLevel = 0;	// nested block comment level

	KeywordType kwType = KeywordType::None;
	int chBeforeIdentifier = 0;

	int delimiterCount = 0;	// count of '#'
	std::vector<int> nestedState; // string interpolation "${}"

	int visibleChars = 0;
	int visibleCharsBefore = 0;
	int chPrevNonWhite = 0;
	EscapeSequence escSeq;

	StyleContext sc(startPos, lengthDoc, initStyle, styler);
	if (sc.currentLine > 0) {
		int lineState = styler.GetLineState(sc.currentLine - 1);
		/*
		2: lineStateLineType
		6: commentLevel
		8: delimiterCount
		3: nestedState count
		3*4: nestedState
		*/
		commentLevel = (lineState >> 2) & 0x3f;
		delimiterCount = (lineState >> 8) & 0xff;
		lineState >>= 16;
		if (lineState) {
			UnpackLineState(lineState, nestedState);
		}
	}

	while (sc.More()) {
		switch (sc.state) {
		case SCE_CANGJIE_OPERATOR:
		case SCE_CANGJIE_OPERATOR2:
			sc.SetState(SCE_CANGJIE_DEFAULT);
			break;

		case SCE_CANGJIE_NUMBER:
			if (!IsDecimalNumberEx(sc.chPrev, sc.ch, sc.chNext)) {
				sc.SetState(SCE_CANGJIE_DEFAULT);
			}
			break;

		case SCE_CANGJIE_IDENTIFIER:
		case SCE_CANGJIE_IDENTIFIER_BT:
		case SCE_CANGJIE_MACRO:
		case SCE_CANGJIE_VARIABLE:
			if (!IsIdentifierChar(sc.ch)) {
				if (sc.state == SCE_CANGJIE_IDENTIFIER_BT) {
					if (sc.ch == '`') {
						sc.Forward();
					}
				} else if (sc.state != SCE_CANGJIE_VARIABLE) {
					char s[128];
					sc.GetCurrent(s, sizeof(s));
					if (sc.state == SCE_CANGJIE_MACRO) {
						// TODO: prefer annotation when next line is class definition
						if (keywordLists[KeywordIndex_Annotation].InList(s + 1)) {
							sc.ChangeState(SCE_CANGJIE_ANNOTATION);
						}
					} else {
						if (keywordLists[KeywordIndex_Keyword].InList(s)) {
							sc.ChangeState(SCE_CANGJIE_WORD);
							if (StrEqualsAny(s, "import")) {
								if (visibleChars == sc.LengthCurrent()) {
									lineStateLineType = CangjieLineStateMaskImport;
								}
							} else if (StrEqualsAny(s, "class", "extend", "throw", "as", "is")) {
								if (lineStateLineType != CangjieLineStateMaskImport) {
									// import as
									kwType = KeywordType::Class;
								}
							} else if (StrEqual(s, "struct")) {
								kwType = KeywordType::Struct;
							} else if (StrEqual(s, "interface")) {
								kwType = KeywordType::Interface;
							} else if (StrEqual(s, "enum")) {
								kwType = KeywordType::Enum;
							} else if (StrEqualsAny(s, "func", "macro")) {
								kwType = KeywordType::Function;
							} else if (StrEqual(s, "type")) {
								kwType = KeywordType::Type; // type alias
							}
							if (kwType != KeywordType::None) {
								const int chNext = sc.GetLineNextChar();
								if (!IsIdentifierStart(chNext)) {
									kwType = KeywordType::None;
								}
							}
						} else if (keywordLists[KeywordIndex_Type].InList(s)) {
							sc.ChangeState(SCE_CANGJIE_WORD2);
						} else if (keywordLists[KeywordIndex_Class].InList(s)) {
							sc.ChangeState(SCE_CANGJIE_CLASS);
						} else if (keywordLists[KeywordIndex_Struct].InList(s)) {
							sc.ChangeState(SCE_CANGJIE_STRUCT);
						} else if (keywordLists[KeywordIndex_Interface].InList(s)) {
							sc.ChangeState(SCE_CANGJIE_INTERFACE);
						} else if (keywordLists[KeywordIndex_Enumeration].InList(s)) {
							sc.ChangeState(SCE_CANGJIE_ENUM);
						}
					}
				}
				if (sc.state == SCE_CANGJIE_IDENTIFIER || sc.state == SCE_CANGJIE_IDENTIFIER_BT) {
					if (sc.ch != '.') {
						if (kwType != KeywordType::None) {
							sc.ChangeState(static_cast<int>(kwType));
						} else {
							const int chNext = sc.GetLineNextChar();
							if (chNext == '(') {
								sc.ChangeState(SCE_CANGJIE_FUNCTION);
							} else if ((chBeforeIdentifier == '<' && (chNext == '>' || chNext == '<'))) {
								// type<type>
								// type<type<type>>
								// type<type, type>
								sc.ChangeState(SCE_CANGJIE_CLASS);
							}
						}
					}
				}
				if (sc.state != SCE_CANGJIE_WORD && sc.ch != '.') {
					kwType = KeywordType::None;
				}
				sc.SetState(SCE_CANGJIE_DEFAULT);
			}
			break;

		case SCE_CANGJIE_COMMENTLINE:
		case SCE_CANGJIE_COMMENTLINEDOC:
			if (sc.atLineStart) {
				sc.SetState(SCE_CANGJIE_DEFAULT);
			} else {
				HighlightTaskMarker(sc, visibleChars, visibleCharsBefore, SCE_CANGJIE_TASKMARKER);
			}
			break;

		case SCE_CANGJIE_COMMENTBLOCK:
		case SCE_CANGJIE_COMMENTBLOCKDOC:
			if (sc.Match('*', '/')) {
				sc.Forward();
				--commentLevel;
				if (commentLevel == 0) {
					sc.ForwardSetState(SCE_CANGJIE_DEFAULT);
				}
			} else if (sc.Match('/', '*')) {
				sc.Forward();
				++commentLevel;
			} else if (HighlightTaskMarker(sc, visibleChars, visibleCharsBefore, SCE_CANGJIE_TASKMARKER)) {
				continue;
			}
			break;

		case SCE_CANGJIE_RUNE_SQ:
		case SCE_CANGJIE_RUNE_DQ:
		case SCE_CANGJIE_STRING_SQ:
		case SCE_CANGJIE_STRING_DQ:
		case SCE_CANGJIE_TRIPLE_STRING_SQ:
		case SCE_CANGJIE_TRIPLE_STRING_DQ:
			if (sc.atLineStart && !IsTripleString(sc.state)) {
				sc.SetState(SCE_CANGJIE_DEFAULT);
			} else if (sc.ch == '\\') {
				if (escSeq.resetEscapeState(sc.state, sc.chNext)) {
					sc.SetState(SCE_CANGJIE_ESCAPECHAR);
					sc.Forward();
					if (sc.Match('u', '{')) {
						escSeq.brace = true;
						escSeq.digitsLeft = 9;
						sc.Forward();
					}
				}
			} else if (sc.Match('$', '{') && sc.state < SCE_CANGJIE_RUNE_SQ) {
				nestedState.push_back(sc.state);
				sc.SetState(SCE_CANGJIE_OPERATOR2);
				sc.Forward();
			} else if (sc.ch == GetStringQuote(sc.state) && (!IsTripleString(sc.state) || sc.MatchNext())) {
				if (IsTripleString(sc.state)) {
					sc.Advance(2);
				}
				sc.ForwardSetState(SCE_CANGJIE_DEFAULT);
			}
			break;

		case SCE_CANGJIE_RAWSTRING_SQ:
		case SCE_CANGJIE_RAWSTRING_DQ:
			if (sc.chNext == '#' && sc.ch == GetStringQuote(sc.state)) {
				int count = delimiterCount;
				do {
					sc.Forward();
					--count;
				} while (count != 0 && sc.ch == '#');
				if (count == 0) {
					delimiterCount = 0;
					sc.ForwardSetState(SCE_CANGJIE_DEFAULT);
				}
			}
			break;

		case SCE_CANGJIE_ESCAPECHAR:
			if (escSeq.atEscapeEnd(sc.ch)) {
				if (escSeq.brace && sc.ch == '}') {
					sc.Forward();
				}
				sc.SetState(escSeq.outerState);
				continue;
			}
			break;
		}

		if (sc.state == SCE_CANGJIE_DEFAULT) {
			if (sc.ch == '/' && (sc.chNext == '/' || sc.chNext == '*')) {
				visibleCharsBefore = visibleChars;
				const int chNext = sc.chNext;
				sc.SetState((chNext == '/') ? SCE_CANGJIE_COMMENTLINE : SCE_CANGJIE_COMMENTBLOCK);
				sc.Forward(2);
				if (sc.ch == chNext && sc.chNext != chNext) {
					static_assert(SCE_CANGJIE_COMMENTLINEDOC - SCE_CANGJIE_COMMENTLINE == SCE_CANGJIE_COMMENTBLOCKDOC - SCE_CANGJIE_COMMENTBLOCK);
					sc.ChangeState(sc.state + SCE_CANGJIE_COMMENTLINEDOC - SCE_CANGJIE_COMMENTLINE);
				}
				if (chNext == '/') {
					if (visibleChars == 0) {
						lineStateLineType = CangjieLineStateMaskLineComment;
					}
				 } else {
					commentLevel = 1;
				 }
				 continue;
			}
			if (sc.ch == 'r' && (sc.chNext == '\'' || sc.chNext == '"')) {
				sc.SetState((sc.chNext == '\'') ? SCE_CANGJIE_RUNE_SQ : SCE_CANGJIE_RUNE_DQ);
				sc.Forward();
			} else if (sc.ch == '"') {
				if (sc.MatchNext('"', '"')) {
					sc.SetState(SCE_CANGJIE_TRIPLE_STRING_DQ);
					sc.Advance(2);
				} else {
					sc.SetState(SCE_CANGJIE_STRING_DQ);
				}
			} else if (sc.ch == '\'') {
				if (sc.MatchNext('\'', '\'')) {
					sc.SetState(SCE_CANGJIE_TRIPLE_STRING_SQ);
					sc.Advance(2);
				} else {
					sc.SetState(SCE_CANGJIE_STRING_SQ);
				}
			} else if (IsNumberStartEx(sc.chPrev, sc.ch, sc.chNext)) {
				sc.SetState(SCE_CANGJIE_NUMBER);
			} else if (sc.ch == '@' && IsIdentifierStart(sc.chNext)) {
				sc.SetState(SCE_CANGJIE_MACRO);
			} else if (sc.ch == '$' && IsIdentifierChar(sc.chNext)) {
				sc.SetState(SCE_CANGJIE_VARIABLE);
			} else if (IsIdentifierStart(sc.ch) || (sc.ch == '`' && IsIdentifierStart(sc.chNext))) {
				if (chPrevNonWhite != '.') {
					chBeforeIdentifier = chPrevNonWhite;
				}
				sc.SetState((sc.ch == '`') ? SCE_CANGJIE_IDENTIFIER_BT : SCE_CANGJIE_IDENTIFIER);
			} else if (sc.ch == '#') {
				sc.SetState(SCE_CANGJIE_OPERATOR);
				int count = 1;
				while (sc.chNext == '#') {
					++count;
					sc.Forward();
				}
				if (sc.chNext == '\'' || sc.chNext == '"') {
					delimiterCount = count;
					sc.ChangeState((sc.chNext == '\'') ? SCE_CANGJIE_RUNE_SQ : SCE_CANGJIE_RAWSTRING_DQ);
					sc.Forward();
				}
			} else if (IsAGraphic(sc.ch)) {
				sc.SetState(SCE_CANGJIE_OPERATOR);
				if (sc.Match('<', ':')) {
					kwType = KeywordType::Class;
				}
				if (!nestedState.empty()) {
					sc.ChangeState(SCE_CANGJIE_OPERATOR2);
					if (sc.ch == '{') {
						nestedState.push_back(SCE_CANGJIE_DEFAULT);
					} else if (sc.ch == '}') {
						const int outerState = TakeAndPop(nestedState);
						sc.ForwardSetState(outerState);
						continue;
					}
				}
			}
		}

		if (!isspacechar(sc.ch)) {
			visibleChars++;
			if (!IsSpaceEquiv(sc.state)) {
				chPrevNonWhite = sc.ch;
			}
		}
		if (sc.atLineEnd) {
			int lineState = (commentLevel << 2) | (delimiterCount << 8) | lineStateLineType;
			if (!nestedState.empty()) {
				lineState |= PackLineState(nestedState) << 16;
			}
			styler.SetLineState(sc.currentLine, lineState);
			lineStateLineType = 0;
			visibleChars = 0;
			visibleCharsBefore = 0;
			kwType = KeywordType::None;
		}
		sc.Forward();
	}

	sc.Complete();
}

struct FoldLineState {
	int lineComment;
	int packageImport;
	constexpr explicit FoldLineState(int lineState) noexcept:
		lineComment(lineState & CangjieLineStateMaskLineComment),
		packageImport((lineState >> 1) & 1) {
	}
};

void FoldCangjieDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList /*keywordLists*/, Accessor &styler) {
	const Sci_PositionU endPos = startPos + lengthDoc;
	Sci_Line lineCurrent = styler.GetLine(startPos);
	FoldLineState foldPrev(0);
	int levelCurrent = SC_FOLDLEVELBASE;
	if (lineCurrent > 0) {
		levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
		foldPrev = FoldLineState(styler.GetLineState(lineCurrent - 1));
		const Sci_PositionU bracePos = CheckBraceOnNextLine(styler, lineCurrent - 1, SCE_CANGJIE_OPERATOR, SCE_CANGJIE_TASKMARKER);
		if (bracePos) {
			startPos = bracePos + 1; // skip the brace
		}
	}

	int levelNext = levelCurrent;
	FoldLineState foldCurrent(styler.GetLineState(lineCurrent));
	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
	lineStartNext = sci::min(lineStartNext, endPos);

	char chNext = styler[startPos];
	int styleNext = styler.StyleAt(startPos);
	int style = initStyle;
	int visibleChars = 0;

	while (startPos < endPos) {
		const char ch = chNext;
		const int stylePrev = style;
		style = styleNext;
		chNext = styler[++startPos];
		styleNext = styler.StyleAt(startPos);

		switch (style) {
		case SCE_CANGJIE_COMMENTBLOCKDOC:
		case SCE_CANGJIE_COMMENTBLOCK: {
			const int level = (ch == '/' && chNext == '*') ? 1 : ((ch == '*' && chNext == '/') ? -1 : 0);
			if (level != 0) {
				levelNext += level;
				startPos++;
				chNext = styler[startPos];
				styleNext = styler.StyleAt(startPos);
			}
		} break;

		case SCE_CANGJIE_TRIPLE_STRING_SQ:
		case SCE_CANGJIE_TRIPLE_STRING_DQ:
		case SCE_CANGJIE_RAWSTRING_SQ:
		case SCE_CANGJIE_RAWSTRING_DQ:
			if (style != stylePrev) {
				levelNext++;
			}
			if (style != styleNext) {
				levelNext--;
			}
			break;

		case SCE_CANGJIE_OPERATOR:
		case SCE_CANGJIE_OPERATOR2:
			if (ch == '{' || ch == '[' || ch == '(') {
				levelNext++;
			} else if (ch == '}' || ch == ']' || ch == ')') {
				levelNext--;
			}
			break;
		}

		if (visibleChars == 0 && !IsSpaceEquiv(style)) {
			++visibleChars;
		}
		if (startPos == lineStartNext) {
			const FoldLineState foldNext(styler.GetLineState(lineCurrent + 1));
			levelNext = sci::max(levelNext, SC_FOLDLEVELBASE);
			if (foldCurrent.lineComment) {
				levelNext += foldNext.lineComment - foldPrev.lineComment;
			} else if (foldCurrent.packageImport) {
				levelNext += foldNext.packageImport - foldPrev.packageImport;
			} else if (visibleChars) {
				const Sci_PositionU bracePos = CheckBraceOnNextLine(styler, lineCurrent, SCE_CANGJIE_OPERATOR, SCE_CANGJIE_TASKMARKER);
				if (bracePos) {
					levelNext++;
					startPos = bracePos + 1; // skip the brace
					style = SCE_CANGJIE_OPERATOR;
					chNext = styler[startPos];
					styleNext = styler.StyleAt(startPos);
				}
			}

			const int levelUse = levelCurrent;
			int lev = levelUse | (levelNext << 16);
			if (levelUse < levelNext) {
				lev |= SC_FOLDLEVELHEADERFLAG;
			}
			styler.SetLevel(lineCurrent, lev);

			lineCurrent++;
			lineStartNext = styler.LineStart(lineCurrent + 1);
			lineStartNext = sci::min(lineStartNext, endPos);
			levelCurrent = levelNext;
			foldPrev = foldCurrent;
			foldCurrent = foldNext;
			visibleChars = 0;
		}
	}
}

}

LexerModule lmCangjie(SCLEX_CANGJIE, ColouriseCangjieDoc, "cj", FoldCangjieDoc);
